import re,urllib2


f=open('d:/tmp/4.html')
data=f.read()
f.close()

#data=str(data.decode('gbk'))
patt='href="[\s\S]+?"'

href_list=re.findall(patt,data)
for href in href_list:
    print href




